Seurat pipeline
getwd()
## [1] "/Users/matianyu/R_project/Seurat_V5"
setwd('/Users/matianyu/R_project/Seurat_V5/')
# prepare workspace
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(magrittr)
library(dplyr)
# load data
pbmc.data <- Read10X(data.dir = "./filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc
## An object of class Seurat
## 13714 features across 2700 samples within 1 assay
## Active assay: RNA (13714 features, 0 variable features)
## 1 layer present: counts
# count matrix
# pbmc[["RNA"]]$counts
# Lets examine a few genes in the first thirty cells
pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACATACAACCAC-1', 'AAACATTGAGCTAC-1', 'AAACATTGATCAGC-1' ... ]]
##
## CD3D 4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2 3 . . . . . 3 4 1 5
## TCL1A . . . . . . . . 1 . . . . . . . . . . . . 1 . . . . . . . .
## MS4A1 . 6 . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .
dense.size <- object.size(as.matrix(pbmc.data))
dense.size
## 709591472 bytes
sparse.size <- object.size(pbmc.data)
sparse.size
## 29905192 bytes
dense.size/sparse.size
## 23.7 bytes
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
# Show QC metrics for the first 5 cells
head(pbmc@meta.data, 5)
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACATACAACCAC-1 pbmc3k 2419 779 3.0177759
## AAACATTGAGCTAC-1 pbmc3k 4903 1352 3.7935958
## AAACATTGATCAGC-1 pbmc3k 3147 1129 0.8897363
## AAACCGTGCTTCCG-1 pbmc3k 2639 960 1.7430845
## AAACCGTGTATGCG-1 pbmc3k 980 521 1.2244898
# Visualize QC metrics as a violin plot
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc
## An object of class Seurat
## 13714 features across 2638 samples within 1 assay
## Active assay: RNA (13714 features, 0 variable features)
## 1 layer present: counts
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
## Normalizing layer: counts
#pbmc[["RNA"]]$counts
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.

all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
# pbmc[["RNA"]]$scale.data
# suggest alter by SCTransform method
pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
## Regressing out percent.mt
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for scale.data
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CST3, TYROBP, LST1, AIF1, FTL, FTH1, LYZ, FCN1, S100A9, TYMP
## FCER1G, CFD, LGALS1, LGALS2, SERPINA1, S100A8, CTSS, IFITM3, SPI1, CFP
## PSAP, IFI30, COTL1, SAT1, S100A11, NPC2, GRN, LGALS3, GSTP1, PYCARD
## Negative: MALAT1, LTB, IL32, IL7R, CD2, B2M, ACAP1, CTSW, STK17A, CD27
## CD247, CCL5, GIMAP5, GZMA, AQP3, CST7, TRAF3IP3, SELL, GZMK, HOPX
## MAL, MYC, ITM2A, ETS1, LYAR, GIMAP7, KLRG1, NKG7, ZAP70, BEX2
## PC_ 2
## Positive: CD79A, MS4A1, TCL1A, HLA-DQA1, HLA-DQB1, HLA-DRA, LINC00926, CD79B, HLA-DRB1, CD74
## HLA-DMA, HLA-DPB1, HLA-DQA2, CD37, HLA-DRB5, HLA-DMB, HLA-DPA1, FCRLA, HVCN1, LTB
## BLNK, P2RX5, IGLL5, IRF8, SWAP70, ARHGAP24, FCGR2B, SMIM14, PPP1R14A, C16orf74
## Negative: NKG7, PRF1, CST7, GZMA, GZMB, FGFBP2, CTSW, GNLY, B2M, SPON2
## CCL4, GZMH, FCGR3A, CCL5, CD247, XCL2, CLIC3, AKR1C3, SRGN, HOPX
## TTC38, CTSC, APMAP, S100A4, IGFBP7, ANXA1, ID2, IL32, XCL1, RHOC
## PC_ 3
## Positive: HLA-DQA1, CD79A, CD79B, HLA-DQB1, HLA-DPA1, HLA-DPB1, CD74, MS4A1, HLA-DRB1, HLA-DRA
## HLA-DRB5, HLA-DQA2, TCL1A, LINC00926, HLA-DMB, HLA-DMA, CD37, HVCN1, FCRLA, IRF8
## PLAC8, BLNK, MALAT1, SMIM14, PLD4, IGLL5, SWAP70, P2RX5, LAT2, FCGR3A
## Negative: PPBP, PF4, SDPR, SPARC, GNG11, NRGN, GP9, RGS18, TUBB1, CLU
## HIST1H2AC, AP001189.4, ITGA2B, CD9, TMEM40, PTCRA, CA2, ACRBP, MMD, TREML1
## NGFRAP1, F13A1, SEPT5, RUFY1, TSC22D1, CMTM5, MPP1, MYL9, RP11-367G6.3, GP1BA
## PC_ 4
## Positive: HLA-DQA1, CD79B, CD79A, MS4A1, HLA-DQB1, CD74, HLA-DPB1, HIST1H2AC, PF4, HLA-DRB1
## HLA-DPA1, SDPR, TCL1A, HLA-DQA2, HLA-DRA, PPBP, LINC00926, GNG11, HLA-DRB5, SPARC
## GP9, PTCRA, AP001189.4, CA2, CD9, NRGN, RGS18, CLU, TUBB1, GZMB
## Negative: VIM, IL7R, S100A6, S100A8, IL32, S100A4, GIMAP7, S100A10, S100A9, MAL
## AQP3, CD14, CD2, LGALS2, FYB, GIMAP4, ANXA1, RBP7, CD27, FCN1
## LYZ, S100A12, MS4A6A, GIMAP5, S100A11, FOLR3, TRABD2A, AIF1, IL8, IFI6
## PC_ 5
## Positive: GZMB, FGFBP2, S100A8, NKG7, GNLY, CCL4, PRF1, CST7, SPON2, GZMA
## GZMH, LGALS2, S100A9, CCL3, XCL2, CD14, CLIC3, CTSW, MS4A6A, S100A12
## GSTP1, RBP7, IGFBP7, FOLR3, AKR1C3, TYROBP, CCL5, TTC38, XCL1, APMAP
## Negative: LTB, IL7R, CKB, MS4A7, RP11-290F20.3, AQP3, SIGLEC10, VIM, CYTIP, HMOX1
## LILRB2, PTGES3, HN1, CD2, CD27, FAM110A, ANXA5, CTD-2006K23.1, MAL, VMO1
## CORO1B, TUBA1B, LILRA3, GDI2, TRADD, IL32, ATP1A1, ABRACL, CCDC109B, PPA1
# Examine and visualize PCA results a few different ways
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: CST3, TYROBP, LST1, AIF1, FTL
## Negative: MALAT1, LTB, IL32, IL7R, CD2
## PC_ 2
## Positive: CD79A, MS4A1, TCL1A, HLA-DQA1, HLA-DQB1
## Negative: NKG7, PRF1, CST7, GZMA, GZMB
## PC_ 3
## Positive: HLA-DQA1, CD79A, CD79B, HLA-DQB1, HLA-DPA1
## Negative: PPBP, PF4, SDPR, SPARC, GNG11
## PC_ 4
## Positive: HLA-DQA1, CD79B, CD79A, MS4A1, HLA-DQB1
## Negative: VIM, IL7R, S100A6, S100A8, IL32
## PC_ 5
## Positive: GZMB, FGFBP2, S100A8, NKG7, GNLY
## Negative: LTB, IL7R, CKB, MS4A7, RP11-290F20.3
VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")

DimPlot(pbmc, reduction = "pca") + NoLegend()

# DimPlot(pbmc, reduction = "pca",group.by = "orig.ident")
DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)

DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)

ElbowPlot(pbmc)

# Method 2 for determin the dimensionality of the dataset
# NOTE: This process can take a long time for big datasets, comment out for expediency.
pbmc <- JackStraw(pbmc, dims = 25,num.replicate = 100)
pbmc <- ScoreJackStraw(pbmc, dims = 1:25)
JackStrawPlot(pbmc, dims = 1:25)
## Warning: Removed 41439 rows containing missing values or values outside the scale range (`geom_point()`).

pbmc <- FindNeighbors(pbmc, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
pbmc <- FindClusters(pbmc, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2638
## Number of edges: 95905
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8735
## Number of communities: 9
## Elapsed time: 0 seconds
# Look at cluster IDs of the first 5 cells
head(Idents(pbmc), 5)
## AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1 AAACCGTGTATGCG-1
## 0 3 2 1 6
## Levels: 0 1 2 3 4 5 6 7 8
pbmc <- RunUMAP(pbmc, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 04:55:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 04:55:01 Read 2638 rows and found 10 numeric columns
## 04:55:01 Using Annoy for neighbor search, n_neighbors = 30
## 04:55:01 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 04:55:01 Writing NN index file to temp file /var/folders/zk/qm3tj_dj2xs0jr03sj14gb300000gn/T//Rtmpz4Wiya/file17d4256b95815
## 04:55:01 Searching Annoy index using 1 thread, search_k = 3000
## 04:55:02 Annoy recall = 100%
## 04:55:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 04:55:03 Initializing from normalized Laplacian + noise (using RSpectra)
## 04:55:03 Commencing optimization for 500 epochs, with 106310 positive edges
## 04:55:06 Optimization finished
# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
DimPlot(pbmc, reduction = "umap")

DimPlot(pbmc, reduction = "pca")

# pbmc <- RunTSNE(pbmc, dims = 1:10)
# reduction = "tsne"
saveRDS(pbmc, file = "./pbmc_tutorial.rds")
# for single cluster
# find all markers of cluster 2
cluster2.markers <- FindMarkers(pbmc, ident.1 = 2)
head(cluster2.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## LTB 1.709675e-83 1.330256 0.982 0.647 2.344648e-79
## IL32 5.076510e-83 1.242930 0.947 0.471 6.961926e-79
## LDHB 2.467055e-68 1.044820 0.967 0.615 3.383320e-64
## CD3D 1.817480e-66 1.058609 0.920 0.438 2.492492e-62
## IL7R 8.698894e-61 1.389909 0.744 0.333 1.192966e-56
# design DE group
# find all markers distinguishing cluster 5 from clusters 0 and 3
cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3))
head(cluster5.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## FCGR3A 5.972471e-204 6.795991 0.975 0.041 8.190647e-200
## IFITM3 5.671364e-195 6.201036 0.975 0.048 7.777708e-191
## CFD 2.389538e-193 6.081028 0.937 0.038 3.277012e-189
## CD68 1.800066e-189 5.472200 0.925 0.036 2.468611e-185
## RP11-290F20.3 6.852416e-189 6.390800 0.843 0.015 9.397404e-185
# find markers for every cluster compared to all remaining cells, report only the positive
# ones
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
pbmc.markers
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## RPS6 2.937899e-145 0.6898595 1.000 0.995 4.029035e-141 0 RPS6
## RPS12 4.537276e-144 0.7396535 1.000 0.991 6.222420e-140 0 RPS12
## RPL32 1.899782e-140 0.6330085 0.999 0.995 2.605361e-136 0 RPL32
## RPS27 2.256206e-137 0.7261693 0.999 0.992 3.094161e-133 0 RPS27
## RPS14 1.833572e-133 0.6367519 1.000 0.994 2.514561e-129 0 RPS14
## RPS25 2.202430e-129 0.7911714 0.997 0.975 3.020412e-125 0 RPS25
## RPL31 9.519837e-121 0.7770798 0.999 0.963 1.305550e-116 0 RPL31
## RPL9 1.093296e-119 0.7735097 0.999 0.970 1.499346e-115 0 RPL9
## RPS3A 1.360323e-116 0.8204878 0.999 0.974 1.865546e-112 0 RPS3A
## RPS3 4.735309e-115 0.6026055 1.000 0.994 6.494002e-111 0 RPS3
## LDHB 5.315619e-114 1.1863137 0.912 0.591 7.289840e-110 0 LDHB
## RPL13 8.785264e-114 0.5558595 1.000 0.996 1.204811e-109 0 RPL13
## RPL3 1.259214e-110 0.6016573 0.997 0.995 1.726886e-106 0 RPL3
## RPL30 1.616151e-110 0.6806377 1.000 0.979 2.216390e-106 0 RPL30
## RPL21 3.071789e-110 0.6770451 0.997 0.991 4.212651e-106 0 RPL21
## RPS15A 1.166374e-108 0.6527358 0.997 0.983 1.599565e-104 0 RPS15A
## RPS27A 3.958327e-106 0.7768403 0.997 0.966 5.428449e-102 0 RPS27A
## RPLP2 3.902958e-103 0.6034904 1.000 0.990 5.352517e-99 0 RPLP2
## EEF1A1 1.093339e-98 0.5369938 0.994 0.991 1.499405e-94 0 EEF1A1
## RPS13 1.074943e-97 0.7129838 0.987 0.961 1.474177e-93 0 RPS13
## RPS18 5.180602e-95 0.5265658 1.000 0.996 7.104678e-91 0 RPS18
## RPL11 8.215087e-93 0.4714148 1.000 0.995 1.126617e-88 0 RPL11
## RPL13A 6.472793e-92 0.4780475 1.000 0.999 8.876788e-88 0 RPL13A
## MALAT1 1.084174e-89 0.6441156 1.000 0.999 1.486836e-85 0 MALAT1
## RPL27A 1.612236e-88 0.5373650 0.999 0.988 2.211020e-84 0 RPL27A
## RPL35A 2.095224e-88 0.5867145 1.000 0.980 2.873390e-84 0 RPL35A
## RPS28 1.289928e-87 0.6408666 0.991 0.969 1.769007e-83 0 RPS28
## RPS23 8.996629e-87 0.5938163 1.000 0.976 1.233798e-82 0 RPS23
## RPS29 2.279008e-86 0.7683730 0.993 0.903 3.125432e-82 0 RPS29
## TPT1 7.840418e-86 0.6067410 0.997 0.982 1.075235e-81 0 TPT1
## RPS20 2.359727e-85 0.7100360 0.991 0.941 3.236130e-81 0 RPS20
## RPL23A 7.505228e-84 0.5568751 1.000 0.988 1.029267e-79 0 RPL23A
## CCR7 1.305705e-83 2.3501488 0.439 0.110 1.790644e-79 0 CCR7
## RPS4X 5.341427e-82 0.5230436 0.997 0.992 7.325233e-78 0 RPS4X
## CD3D 2.613397e-78 1.0628875 0.850 0.403 3.584012e-74 0 CD3D
## RPL5 1.082456e-76 0.6075214 0.988 0.969 1.484480e-72 0 RPL5
## RPL10 2.064950e-75 0.3709084 1.000 0.997 2.831872e-71 0 RPL10
## RPS16 1.223484e-73 0.5411983 0.996 0.982 1.677886e-69 0 RPS16
## RPL19 9.843176e-73 0.4022453 0.999 0.993 1.349893e-68 0 RPL19
## RPL10A 7.091479e-71 0.5470559 0.991 0.985 9.725254e-67 0 RPL10A
## RPSA 2.882226e-68 0.5854662 0.988 0.948 3.952685e-64 0 RPSA
## RPL14 6.585636e-64 0.4447083 0.996 0.985 9.031541e-60 0 RPL14
## RPS10 1.332199e-61 0.5371322 0.991 0.968 1.826978e-57 0 RPS10
## RPS5 2.436058e-61 0.5184481 0.990 0.975 3.340810e-57 0 RPS5
## NPM1 9.738318e-61 0.7971905 0.922 0.797 1.335513e-56 0 NPM1
## RPS15 7.595881e-60 0.4177772 1.000 0.989 1.041699e-55 0 RPS15
## RPL18 1.237903e-57 0.5038895 0.999 0.978 1.697661e-53 0 RPL18
## RPLP0 1.978925e-57 0.5476535 0.988 0.938 2.713897e-53 0 RPLP0
## RPL36 9.837428e-57 0.5491005 0.990 0.948 1.349105e-52 0 RPL36
## CD3E 5.885316e-55 1.0274342 0.731 0.398 8.071122e-51 0 CD3E
## RPL7 2.243682e-53 0.5914250 0.981 0.966 3.076986e-49 0 RPL7
## RPL36A 1.706982e-51 0.6163363 0.965 0.887 2.340956e-47 0 RPL36A
## LEF1 3.911145e-50 2.1066714 0.338 0.104 5.363744e-46 0 LEF1
## RPL4 9.797668e-50 0.5170381 0.994 0.956 1.343652e-45 0 RPL4
## RPS19 4.943950e-49 0.3765976 0.999 0.992 6.780134e-45 0 RPS19
## RPS2 8.998587e-49 0.3366035 1.000 0.995 1.234066e-44 0 RPS2
## RPL17 7.052454e-48 0.4772102 0.977 0.957 9.671735e-44 0 RPL17
## EEF1B2 1.972361e-47 0.5724925 0.945 0.867 2.704896e-43 0 EEF1B2
## NOSIP 2.532206e-47 1.2285845 0.624 0.360 3.472667e-43 0 NOSIP
## RPL22 8.502524e-47 0.7086351 0.913 0.776 1.166036e-42 0 RPL22
## RPS8 8.809197e-47 0.4319487 0.997 0.984 1.208093e-42 0 RPS8
## RPL18A 1.000407e-46 0.3206133 0.999 0.992 1.371958e-42 0 RPL18A
## PRKCQ-AS1 5.109668e-46 2.0421152 0.335 0.109 7.007398e-42 0 PRKCQ-AS1
## PIK3IP1 5.487008e-43 1.5149033 0.438 0.186 7.524882e-39 0 PIK3IP1
## TMEM66 5.573516e-43 0.8245007 0.850 0.698 7.643520e-39 0 TMEM66
## RPS26 5.330005e-42 0.5299071 0.965 0.897 7.309569e-38 0 RPS26
## BTG1 9.098088e-42 0.5963114 0.947 0.858 1.247712e-37 0 BTG1
## JUNB 3.637979e-41 0.7038651 0.913 0.904 4.989125e-37 0 JUNB
## GLTSCR2 4.347251e-41 0.6219340 0.916 0.804 5.961820e-37 0 GLTSCR2
## FHIT 9.171236e-41 2.7283106 0.199 0.040 1.257743e-36 0 FHIT
## IL7R 3.785547e-40 0.9511342 0.613 0.328 5.191499e-36 0 IL7R
## RPL38 9.709762e-40 0.5534875 0.952 0.872 1.331597e-35 0 RPL38
## RPS21 3.778434e-38 0.5389210 0.942 0.838 5.181744e-34 0 RPS21
## RPLP1 1.229862e-37 0.3157365 0.999 0.996 1.686633e-33 0 RPLP1
## CD7 4.521902e-37 0.8346521 0.571 0.295 6.201337e-33 0 CD7
## RPL35 2.614687e-36 0.4065346 0.993 0.964 3.585781e-32 0 RPL35
## RPL12 3.650663e-35 0.2977780 1.000 0.989 5.006519e-31 0 RPL12
## RPL24 8.842800e-35 0.4278003 0.984 0.942 1.212702e-30 0 RPL24
## RPL37 8.721650e-34 0.4374005 0.987 0.942 1.196087e-29 0 RPL37
## TCF7 1.257656e-33 1.3168493 0.390 0.177 1.724750e-29 0 TCF7
## MAL 7.989692e-33 1.8871824 0.263 0.088 1.095706e-28 0 MAL
## C6orf48 2.257274e-32 0.9450448 0.699 0.491 3.095626e-28 0 C6orf48
## RPL34 1.934954e-31 0.6268241 0.931 0.902 2.653596e-27 0 RPL34
## RPSAP58 1.586125e-30 0.5962843 0.848 0.696 2.175212e-26 0 RPSAP58
## RPL27 2.169278e-30 0.3826747 0.977 0.951 2.974948e-26 0 RPL27
## NELL2 6.989473e-30 2.1982153 0.156 0.033 9.585363e-26 0 NELL2
## RPL37A 8.086232e-30 0.3776142 0.986 0.945 1.108946e-25 0 RPL37A
## CD27 1.582455e-29 0.9705380 0.399 0.183 2.170179e-25 0 CD27
## RPS4Y1 3.345104e-29 0.4904763 0.939 0.869 4.587476e-25 0 RPS4Y1
## LTB 4.574471e-29 0.4311664 0.903 0.633 6.273430e-25 0 LTB
## RPL6 1.421552e-28 0.3022301 0.999 0.992 1.949516e-24 0 RPL6
## LINC00176 8.346010e-28 2.5390247 0.146 0.031 1.144572e-23 0 LINC00176
## RPL7A 1.703449e-27 0.3293995 0.987 0.975 2.336110e-23 0 RPL7A
## LDLRAP1 2.865661e-27 2.3083384 0.240 0.087 3.929967e-23 0 LDLRAP1
## LEPROTL1 3.988222e-27 1.1531127 0.405 0.208 5.469447e-23 0 LEPROTL1
## TRABD2A 6.169276e-27 2.2714828 0.197 0.060 8.460545e-23 0 TRABD2A
## RPL15 2.248364e-25 0.2783623 0.996 0.996 3.083407e-21 0 RPL15
## SH3YL1 1.113838e-23 1.7373622 0.197 0.065 1.527517e-19 0 SH3YL1
## ADTRP 1.463116e-23 3.0056072 0.101 0.016 2.006517e-19 0 ADTRP
## AES 2.672638e-23 0.6863009 0.645 0.439 3.665256e-19 0 AES
## OXNAD1 1.164692e-22 1.7131223 0.217 0.082 1.597259e-18 0 OXNAD1
## RGCC 3.050476e-22 1.0887431 0.360 0.184 4.183423e-18 0 RGCC
## FLT3LG 4.101801e-21 1.4036755 0.282 0.134 5.625210e-17 0 FLT3LG
## RPS7 5.825889e-21 0.2832534 0.997 0.987 7.989625e-17 0 RPS7
## RPL29 9.492159e-21 0.2462272 0.994 0.984 1.301755e-16 0 RPL29
## EPHX2 1.518260e-20 2.2132803 0.121 0.030 2.082142e-16 0 EPHX2
## RPS9 1.774342e-20 0.2657734 0.999 0.986 2.433332e-16 0 RPS9
## RHOH 2.313540e-20 1.0868955 0.376 0.206 3.172789e-16 0 RHOH
## C12orf57 4.612279e-20 1.1083841 0.426 0.257 6.325279e-16 0 C12orf57
## RPL26 5.113798e-20 0.2416266 0.996 0.991 7.013063e-16 0 RPL26
## TMEM123 5.544099e-20 0.7526939 0.549 0.384 7.603177e-16 0 TMEM123
## NDFIP1 6.360731e-20 1.0778784 0.444 0.283 8.723107e-16 0 NDFIP1
## LCK 1.276374e-19 0.7912599 0.500 0.312 1.750420e-15 0 LCK
## RPL28 2.327206e-19 0.1962027 1.000 0.993 3.191530e-15 0 RPL28
## C14orf64 7.044380e-19 2.1687455 0.118 0.031 9.660663e-15 0 C14orf64
## HNRNPA1 1.189733e-18 0.3177602 0.961 0.918 1.631600e-14 0 HNRNPA1
## TOMM7 1.280213e-18 0.4789868 0.841 0.757 1.755684e-14 0 TOMM7
## GIMAP5 2.435741e-18 0.7639453 0.408 0.243 3.340375e-14 0 GIMAP5
## CD3G 2.478344e-18 0.9408833 0.335 0.178 3.398801e-14 0 CD3G
## NGFRAP1 2.739478e-18 1.0610024 0.172 0.062 3.756921e-14 0 NGFRAP1
## SCGB3A1 3.277375e-18 2.4404345 0.114 0.030 4.494593e-14 0 SCGB3A1
## IL32 7.367129e-18 0.2280903 0.772 0.474 1.010328e-13 0 IL32
## RP11-664D1.1 7.448676e-18 2.1001652 0.121 0.034 1.021511e-13 0 RP11-664D1.1
## HINT1 7.543242e-18 0.4675713 0.741 0.606 1.034480e-13 0 HINT1
## APBA2 1.376412e-17 2.2130728 0.114 0.031 1.887611e-13 0 APBA2
## SELL 3.822182e-17 0.6231188 0.595 0.429 5.241740e-13 0 SELL
## CD8B 5.024126e-17 1.4163830 0.215 0.096 6.890086e-13 0 CD8B
## NUCB2 1.057363e-16 1.3533374 0.201 0.086 1.450067e-12 0 NUCB2
## RGS10 2.710405e-16 0.6854698 0.527 0.377 3.717050e-12 0 RGS10
## TSHZ2 3.315123e-16 2.4487436 0.092 0.022 4.546360e-12 0 TSHZ2
## RP11-291B21.2 7.275789e-16 2.4564465 0.087 0.020 9.978017e-12 0 RP11-291B21.2
## MYC 1.834328e-15 1.0705858 0.236 0.116 2.515598e-11 0 MYC
## ACTN1 2.834538e-15 1.1963538 0.142 0.050 3.887285e-11 0 ACTN1
## GYPC 3.189151e-15 0.8552184 0.397 0.255 4.373602e-11 0 GYPC
## EIF4A2 3.707051e-15 0.5340309 0.738 0.606 5.083849e-11 0 EIF4A2
## AAK1 3.872967e-15 1.2490141 0.231 0.115 5.311386e-11 0 AAK1
## TRAT1 4.807266e-15 1.0646944 0.264 0.141 6.592684e-11 0 TRAT1
## RPL8 7.486744e-15 0.2331500 0.993 0.986 1.026732e-10 0 RPL8
## GNB2L1 7.876619e-15 0.2572562 0.990 0.977 1.080200e-10 0 GNB2L1
## DNAJB1 2.370874e-14 0.9701762 0.390 0.257 3.251416e-10 0 DNAJB1
## BEX2 6.175741e-14 1.0635440 0.184 0.081 8.469411e-10 0 BEX2
## CD6 1.137212e-13 1.2797712 0.185 0.084 1.559572e-09 0 CD6
## [ reached 'max' / getOption("max.print") -- omitted 11564 rows ]
pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1)
## # A tibble: 7,024 × 7
## # Groups: cluster [9]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 5.32e-114 1.19 0.912 0.591 7.29e-110 0 LDHB
## 2 1.31e- 83 2.35 0.439 0.11 1.79e- 79 0 CCR7
## 3 2.61e- 78 1.06 0.85 0.403 3.58e- 74 0 CD3D
## 4 5.89e- 55 1.03 0.731 0.398 8.07e- 51 0 CD3E
## 5 3.91e- 50 2.11 0.338 0.104 5.36e- 46 0 LEF1
## 6 2.53e- 47 1.23 0.624 0.36 3.47e- 43 0 NOSIP
## 7 5.11e- 46 2.04 0.335 0.109 7.01e- 42 0 PRKCQ-AS1
## 8 5.49e- 43 1.51 0.438 0.186 7.52e- 39 0 PIK3IP1
## 9 9.17e- 41 2.73 0.199 0.04 1.26e- 36 0 FHIT
## 10 1.26e- 33 1.32 0.39 0.177 1.72e- 29 0 TCF7
## # ℹ 7,014 more rows
pbmc.markers
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## RPS6 2.937899e-145 0.6898595 1.000 0.995 4.029035e-141 0 RPS6
## RPS12 4.537276e-144 0.7396535 1.000 0.991 6.222420e-140 0 RPS12
## RPL32 1.899782e-140 0.6330085 0.999 0.995 2.605361e-136 0 RPL32
## RPS27 2.256206e-137 0.7261693 0.999 0.992 3.094161e-133 0 RPS27
## RPS14 1.833572e-133 0.6367519 1.000 0.994 2.514561e-129 0 RPS14
## RPS25 2.202430e-129 0.7911714 0.997 0.975 3.020412e-125 0 RPS25
## RPL31 9.519837e-121 0.7770798 0.999 0.963 1.305550e-116 0 RPL31
## RPL9 1.093296e-119 0.7735097 0.999 0.970 1.499346e-115 0 RPL9
## RPS3A 1.360323e-116 0.8204878 0.999 0.974 1.865546e-112 0 RPS3A
## RPS3 4.735309e-115 0.6026055 1.000 0.994 6.494002e-111 0 RPS3
## LDHB 5.315619e-114 1.1863137 0.912 0.591 7.289840e-110 0 LDHB
## RPL13 8.785264e-114 0.5558595 1.000 0.996 1.204811e-109 0 RPL13
## RPL3 1.259214e-110 0.6016573 0.997 0.995 1.726886e-106 0 RPL3
## RPL30 1.616151e-110 0.6806377 1.000 0.979 2.216390e-106 0 RPL30
## RPL21 3.071789e-110 0.6770451 0.997 0.991 4.212651e-106 0 RPL21
## RPS15A 1.166374e-108 0.6527358 0.997 0.983 1.599565e-104 0 RPS15A
## RPS27A 3.958327e-106 0.7768403 0.997 0.966 5.428449e-102 0 RPS27A
## RPLP2 3.902958e-103 0.6034904 1.000 0.990 5.352517e-99 0 RPLP2
## EEF1A1 1.093339e-98 0.5369938 0.994 0.991 1.499405e-94 0 EEF1A1
## RPS13 1.074943e-97 0.7129838 0.987 0.961 1.474177e-93 0 RPS13
## RPS18 5.180602e-95 0.5265658 1.000 0.996 7.104678e-91 0 RPS18
## RPL11 8.215087e-93 0.4714148 1.000 0.995 1.126617e-88 0 RPL11
## RPL13A 6.472793e-92 0.4780475 1.000 0.999 8.876788e-88 0 RPL13A
## MALAT1 1.084174e-89 0.6441156 1.000 0.999 1.486836e-85 0 MALAT1
## RPL27A 1.612236e-88 0.5373650 0.999 0.988 2.211020e-84 0 RPL27A
## RPL35A 2.095224e-88 0.5867145 1.000 0.980 2.873390e-84 0 RPL35A
## RPS28 1.289928e-87 0.6408666 0.991 0.969 1.769007e-83 0 RPS28
## RPS23 8.996629e-87 0.5938163 1.000 0.976 1.233798e-82 0 RPS23
## RPS29 2.279008e-86 0.7683730 0.993 0.903 3.125432e-82 0 RPS29
## TPT1 7.840418e-86 0.6067410 0.997 0.982 1.075235e-81 0 TPT1
## RPS20 2.359727e-85 0.7100360 0.991 0.941 3.236130e-81 0 RPS20
## RPL23A 7.505228e-84 0.5568751 1.000 0.988 1.029267e-79 0 RPL23A
## CCR7 1.305705e-83 2.3501488 0.439 0.110 1.790644e-79 0 CCR7
## RPS4X 5.341427e-82 0.5230436 0.997 0.992 7.325233e-78 0 RPS4X
## CD3D 2.613397e-78 1.0628875 0.850 0.403 3.584012e-74 0 CD3D
## RPL5 1.082456e-76 0.6075214 0.988 0.969 1.484480e-72 0 RPL5
## RPL10 2.064950e-75 0.3709084 1.000 0.997 2.831872e-71 0 RPL10
## RPS16 1.223484e-73 0.5411983 0.996 0.982 1.677886e-69 0 RPS16
## RPL19 9.843176e-73 0.4022453 0.999 0.993 1.349893e-68 0 RPL19
## RPL10A 7.091479e-71 0.5470559 0.991 0.985 9.725254e-67 0 RPL10A
## RPSA 2.882226e-68 0.5854662 0.988 0.948 3.952685e-64 0 RPSA
## RPL14 6.585636e-64 0.4447083 0.996 0.985 9.031541e-60 0 RPL14
## RPS10 1.332199e-61 0.5371322 0.991 0.968 1.826978e-57 0 RPS10
## RPS5 2.436058e-61 0.5184481 0.990 0.975 3.340810e-57 0 RPS5
## NPM1 9.738318e-61 0.7971905 0.922 0.797 1.335513e-56 0 NPM1
## RPS15 7.595881e-60 0.4177772 1.000 0.989 1.041699e-55 0 RPS15
## RPL18 1.237903e-57 0.5038895 0.999 0.978 1.697661e-53 0 RPL18
## RPLP0 1.978925e-57 0.5476535 0.988 0.938 2.713897e-53 0 RPLP0
## RPL36 9.837428e-57 0.5491005 0.990 0.948 1.349105e-52 0 RPL36
## CD3E 5.885316e-55 1.0274342 0.731 0.398 8.071122e-51 0 CD3E
## RPL7 2.243682e-53 0.5914250 0.981 0.966 3.076986e-49 0 RPL7
## RPL36A 1.706982e-51 0.6163363 0.965 0.887 2.340956e-47 0 RPL36A
## LEF1 3.911145e-50 2.1066714 0.338 0.104 5.363744e-46 0 LEF1
## RPL4 9.797668e-50 0.5170381 0.994 0.956 1.343652e-45 0 RPL4
## RPS19 4.943950e-49 0.3765976 0.999 0.992 6.780134e-45 0 RPS19
## RPS2 8.998587e-49 0.3366035 1.000 0.995 1.234066e-44 0 RPS2
## RPL17 7.052454e-48 0.4772102 0.977 0.957 9.671735e-44 0 RPL17
## EEF1B2 1.972361e-47 0.5724925 0.945 0.867 2.704896e-43 0 EEF1B2
## NOSIP 2.532206e-47 1.2285845 0.624 0.360 3.472667e-43 0 NOSIP
## RPL22 8.502524e-47 0.7086351 0.913 0.776 1.166036e-42 0 RPL22
## RPS8 8.809197e-47 0.4319487 0.997 0.984 1.208093e-42 0 RPS8
## RPL18A 1.000407e-46 0.3206133 0.999 0.992 1.371958e-42 0 RPL18A
## PRKCQ-AS1 5.109668e-46 2.0421152 0.335 0.109 7.007398e-42 0 PRKCQ-AS1
## PIK3IP1 5.487008e-43 1.5149033 0.438 0.186 7.524882e-39 0 PIK3IP1
## TMEM66 5.573516e-43 0.8245007 0.850 0.698 7.643520e-39 0 TMEM66
## RPS26 5.330005e-42 0.5299071 0.965 0.897 7.309569e-38 0 RPS26
## BTG1 9.098088e-42 0.5963114 0.947 0.858 1.247712e-37 0 BTG1
## JUNB 3.637979e-41 0.7038651 0.913 0.904 4.989125e-37 0 JUNB
## GLTSCR2 4.347251e-41 0.6219340 0.916 0.804 5.961820e-37 0 GLTSCR2
## FHIT 9.171236e-41 2.7283106 0.199 0.040 1.257743e-36 0 FHIT
## IL7R 3.785547e-40 0.9511342 0.613 0.328 5.191499e-36 0 IL7R
## RPL38 9.709762e-40 0.5534875 0.952 0.872 1.331597e-35 0 RPL38
## RPS21 3.778434e-38 0.5389210 0.942 0.838 5.181744e-34 0 RPS21
## RPLP1 1.229862e-37 0.3157365 0.999 0.996 1.686633e-33 0 RPLP1
## CD7 4.521902e-37 0.8346521 0.571 0.295 6.201337e-33 0 CD7
## RPL35 2.614687e-36 0.4065346 0.993 0.964 3.585781e-32 0 RPL35
## RPL12 3.650663e-35 0.2977780 1.000 0.989 5.006519e-31 0 RPL12
## RPL24 8.842800e-35 0.4278003 0.984 0.942 1.212702e-30 0 RPL24
## RPL37 8.721650e-34 0.4374005 0.987 0.942 1.196087e-29 0 RPL37
## TCF7 1.257656e-33 1.3168493 0.390 0.177 1.724750e-29 0 TCF7
## MAL 7.989692e-33 1.8871824 0.263 0.088 1.095706e-28 0 MAL
## C6orf48 2.257274e-32 0.9450448 0.699 0.491 3.095626e-28 0 C6orf48
## RPL34 1.934954e-31 0.6268241 0.931 0.902 2.653596e-27 0 RPL34
## RPSAP58 1.586125e-30 0.5962843 0.848 0.696 2.175212e-26 0 RPSAP58
## RPL27 2.169278e-30 0.3826747 0.977 0.951 2.974948e-26 0 RPL27
## NELL2 6.989473e-30 2.1982153 0.156 0.033 9.585363e-26 0 NELL2
## RPL37A 8.086232e-30 0.3776142 0.986 0.945 1.108946e-25 0 RPL37A
## CD27 1.582455e-29 0.9705380 0.399 0.183 2.170179e-25 0 CD27
## RPS4Y1 3.345104e-29 0.4904763 0.939 0.869 4.587476e-25 0 RPS4Y1
## LTB 4.574471e-29 0.4311664 0.903 0.633 6.273430e-25 0 LTB
## RPL6 1.421552e-28 0.3022301 0.999 0.992 1.949516e-24 0 RPL6
## LINC00176 8.346010e-28 2.5390247 0.146 0.031 1.144572e-23 0 LINC00176
## RPL7A 1.703449e-27 0.3293995 0.987 0.975 2.336110e-23 0 RPL7A
## LDLRAP1 2.865661e-27 2.3083384 0.240 0.087 3.929967e-23 0 LDLRAP1
## LEPROTL1 3.988222e-27 1.1531127 0.405 0.208 5.469447e-23 0 LEPROTL1
## TRABD2A 6.169276e-27 2.2714828 0.197 0.060 8.460545e-23 0 TRABD2A
## RPL15 2.248364e-25 0.2783623 0.996 0.996 3.083407e-21 0 RPL15
## SH3YL1 1.113838e-23 1.7373622 0.197 0.065 1.527517e-19 0 SH3YL1
## ADTRP 1.463116e-23 3.0056072 0.101 0.016 2.006517e-19 0 ADTRP
## AES 2.672638e-23 0.6863009 0.645 0.439 3.665256e-19 0 AES
## OXNAD1 1.164692e-22 1.7131223 0.217 0.082 1.597259e-18 0 OXNAD1
## RGCC 3.050476e-22 1.0887431 0.360 0.184 4.183423e-18 0 RGCC
## FLT3LG 4.101801e-21 1.4036755 0.282 0.134 5.625210e-17 0 FLT3LG
## RPS7 5.825889e-21 0.2832534 0.997 0.987 7.989625e-17 0 RPS7
## RPL29 9.492159e-21 0.2462272 0.994 0.984 1.301755e-16 0 RPL29
## EPHX2 1.518260e-20 2.2132803 0.121 0.030 2.082142e-16 0 EPHX2
## RPS9 1.774342e-20 0.2657734 0.999 0.986 2.433332e-16 0 RPS9
## RHOH 2.313540e-20 1.0868955 0.376 0.206 3.172789e-16 0 RHOH
## C12orf57 4.612279e-20 1.1083841 0.426 0.257 6.325279e-16 0 C12orf57
## RPL26 5.113798e-20 0.2416266 0.996 0.991 7.013063e-16 0 RPL26
## TMEM123 5.544099e-20 0.7526939 0.549 0.384 7.603177e-16 0 TMEM123
## NDFIP1 6.360731e-20 1.0778784 0.444 0.283 8.723107e-16 0 NDFIP1
## LCK 1.276374e-19 0.7912599 0.500 0.312 1.750420e-15 0 LCK
## RPL28 2.327206e-19 0.1962027 1.000 0.993 3.191530e-15 0 RPL28
## C14orf64 7.044380e-19 2.1687455 0.118 0.031 9.660663e-15 0 C14orf64
## HNRNPA1 1.189733e-18 0.3177602 0.961 0.918 1.631600e-14 0 HNRNPA1
## TOMM7 1.280213e-18 0.4789868 0.841 0.757 1.755684e-14 0 TOMM7
## GIMAP5 2.435741e-18 0.7639453 0.408 0.243 3.340375e-14 0 GIMAP5
## CD3G 2.478344e-18 0.9408833 0.335 0.178 3.398801e-14 0 CD3G
## NGFRAP1 2.739478e-18 1.0610024 0.172 0.062 3.756921e-14 0 NGFRAP1
## SCGB3A1 3.277375e-18 2.4404345 0.114 0.030 4.494593e-14 0 SCGB3A1
## IL32 7.367129e-18 0.2280903 0.772 0.474 1.010328e-13 0 IL32
## RP11-664D1.1 7.448676e-18 2.1001652 0.121 0.034 1.021511e-13 0 RP11-664D1.1
## HINT1 7.543242e-18 0.4675713 0.741 0.606 1.034480e-13 0 HINT1
## APBA2 1.376412e-17 2.2130728 0.114 0.031 1.887611e-13 0 APBA2
## SELL 3.822182e-17 0.6231188 0.595 0.429 5.241740e-13 0 SELL
## CD8B 5.024126e-17 1.4163830 0.215 0.096 6.890086e-13 0 CD8B
## NUCB2 1.057363e-16 1.3533374 0.201 0.086 1.450067e-12 0 NUCB2
## RGS10 2.710405e-16 0.6854698 0.527 0.377 3.717050e-12 0 RGS10
## TSHZ2 3.315123e-16 2.4487436 0.092 0.022 4.546360e-12 0 TSHZ2
## RP11-291B21.2 7.275789e-16 2.4564465 0.087 0.020 9.978017e-12 0 RP11-291B21.2
## MYC 1.834328e-15 1.0705858 0.236 0.116 2.515598e-11 0 MYC
## ACTN1 2.834538e-15 1.1963538 0.142 0.050 3.887285e-11 0 ACTN1
## GYPC 3.189151e-15 0.8552184 0.397 0.255 4.373602e-11 0 GYPC
## EIF4A2 3.707051e-15 0.5340309 0.738 0.606 5.083849e-11 0 EIF4A2
## AAK1 3.872967e-15 1.2490141 0.231 0.115 5.311386e-11 0 AAK1
## TRAT1 4.807266e-15 1.0646944 0.264 0.141 6.592684e-11 0 TRAT1
## RPL8 7.486744e-15 0.2331500 0.993 0.986 1.026732e-10 0 RPL8
## GNB2L1 7.876619e-15 0.2572562 0.990 0.977 1.080200e-10 0 GNB2L1
## DNAJB1 2.370874e-14 0.9701762 0.390 0.257 3.251416e-10 0 DNAJB1
## BEX2 6.175741e-14 1.0635440 0.184 0.081 8.469411e-10 0 BEX2
## CD6 1.137212e-13 1.2797712 0.185 0.084 1.559572e-09 0 CD6
## [ reached 'max' / getOption("max.print") -- omitted 11564 rows ]
cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)
VlnPlot(pbmc, features = c("MS4A1", "CD79A"))

# you can plot raw counts as well
VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)
## Warning: The `slot` argument of `VlnPlot()` is deprecated as of Seurat 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP",
"CD8A"))

pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 10) %>%
ungroup() -> top10
DoHeatmap(pbmc, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(pbmc, features = top10$gene): The following features were omitted as they were not found in the scale.data slot for the
## RNA assay: PKIB, KLRD1, PILRA, MS4A4A, HES4, CDKN1C, CD8A, VPREB3, TRAT1, TNFRSF4, TCF7, FHIT, PIK3IP1, PRKCQ-AS1, NOSIP, LEF1, CD3E, CD3D,
## CCR7, LDHB

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
"NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)
# pbmc <- StashIdent(pbmc, save.name = "original_cluster")
# 恢复原始 cluster
#Idents(pbmc) <- pbmc@meta.data$original_cluster
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

head(pbmc@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.5 seurat_clusters
## AAACATACAACCAC-1 pbmc3k 2419 779 3.0177759 0 0
## AAACATTGAGCTAC-1 pbmc3k 4903 1352 3.7935958 3 3
## AAACATTGATCAGC-1 pbmc3k 3147 1129 0.8897363 2 2
## AAACCGTGCTTCCG-1 pbmc3k 2639 960 1.7430845 1 1
## AAACCGTGTATGCG-1 pbmc3k 980 521 1.2244898 6 6
## AAACGCACTGGTAC-1 pbmc3k 2163 781 1.6643551 2 2
plot <- DimPlot(pbmc, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10)))
ggsave(filename = "./images/pbmc3k_umap.jpg", height = 7, width = 12, plot = plot, quality = 50)
saveRDS(pbmc, file = "./pbmc3k_final.rds")
# test classical marker gene expression cluster
FeaturePlot(pbmc, features = c("IL7R","CCR7","CD14","LYZ","S100A4","MS4A1","CD8A","FCGR3A","MS4A7","GNLY","NKG7","FCER1A","CST3","PPBP"))

pbmc
## An object of class Seurat
## 13714 features across 2638 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
## 3 layers present: counts, data, scale.data
## 2 dimensional reductions calculated: pca, umap
# Visulization part
# 随机分组 size = ncol(pbmc3k.final) -> 抽样总数为细胞数量
pbmc$groups <- sample(c("group1", "group2"), size = ncol(pbmc), replace = TRUE)
# 可视化关键marker基因
features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4")
# Ridge plots - from ggridges. Visualize single cell expression distributions in each cluster
RidgePlot(pbmc, features = features, ncol = 2)
## Picking joint bandwidth of 0.318
## Picking joint bandwidth of 0.174
## Picking joint bandwidth of 0.161
## Picking joint bandwidth of 0.152
## Picking joint bandwidth of 0.0572
## Picking joint bandwidth of 0.0298

# Violin plot - Visualize single cell expression distributions in each cluster
VlnPlot(pbmc, features = features)

# Feature plot - visualize feature expression in low-dimensional space
FeaturePlot(pbmc, features = features)

# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
DotPlot(pbmc, features = features) + RotatedAxis()

# Single cell heatmap of feature expression
DoHeatmap(subset(pbmc, downsample = 100), features = features, size = 3)
## Warning in DoHeatmap(subset(pbmc, downsample = 100), features = features, : The following features were omitted as they were not found in the
## scale.data slot for the RNA assay: PTPRCAP

# Plot a legend to map colors to expression levels
FeaturePlot(pbmc, features = "MS4A1")

# Adjust the contrast in the plot
FeaturePlot(pbmc, features = "MS4A1", min.cutoff = 1, max.cutoff = 3)

# Calculate feature-specific contrast levels based on quantiles of non-zero expression.
# Particularly useful when plotting multiple markers
FeaturePlot(pbmc, features = c("MS4A1", "PTPRCAP"), min.cutoff = "q10", max.cutoff = "q90")

# Visualize co-expression of two features simultaneously
FeaturePlot(pbmc, features = c("MS4A1", "CD79A"), blend = TRUE)

# Split visualization to view expression by groups (replaces FeatureHeatmap)
FeaturePlot(pbmc, features = c("MS4A1", "CD79A"), split.by = "groups")& scale_color_viridis_c()
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# Violin plots can also be split on some variable. Simply add the splitting variable to object
# metadata and pass it to the split.by argument
VlnPlot(pbmc, features = "percent.mt", split.by = "groups")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.

# SplitDotPlotGG has been replaced with the `split.by` parameter for DotPlot
DotPlot(pbmc, features = features, split.by = "groups") + RotatedAxis()

# DoHeatmap now shows a grouping bar, splitting the heatmap into groups or clusters. This can
# be changed with the `group.by` parameter
DoHeatmap(pbmc, features = VariableFeatures(pbmc)[1:100], cells = 1:500, size = 4,
angle = 90) + NoLegend()

# Step for Moncle3 pseodutime analysis
library(monocle3)
pbmc <- readRDS("./output/pbmc3k_final.rds")
DimPlot(pbmc, reduction = "umap")

# 查看 Seurat 对象的所有 assay
names(pbmc)
## [1] "RNA" "RNA_nn" "RNA_snn" "pca" "umap"
# meta
colnames(pbmc@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "percent.mt" "RNA_snn_res.0.5" "seurat_clusters"
pbmc.cds <- as.cell_data_set(pbmc)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does not calculate. Please run 'cluster_cells' on your cell_data_set
## object
pbmc.cds <- cluster_cells(cds = pbmc.cds, reduction_method = "UMAP")
p1 <- plot_cells(pbmc.cds, show_trajectory_graph = FALSE)
p2 <- plot_cells(pbmc.cds, color_cells_by = "partition", show_trajectory_graph = FALSE)
wrap_plots(p1, p2)

pbmc.cds <- learn_graph(pbmc.cds, use_partition = TRUE)
## | | | 0% | |=====================================================================================================================================| 100%
## | | | 0% | |=====================================================================================================================================| 100%
## | | | 0% | |=====================================================================================================================================| 100%
plot_cells(pbmc.cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE)

plot_cells(pbmc.cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = TRUE, color_cells_by = 'seurat_clusters')

pbmc.cds <- order_cells(pbmc.cds, reduction_method = "UMAP")
##
## Listening on http://127.0.0.1:4504
# root_cell for curate initial cellList
# plot trajectories colored by pseudotime
plot_cells(
cds = pbmc.cds,
color_cells_by = "pseudotime",
show_trajectory_graph = TRUE
)
## Cells aren't colored in a way that allows them to be grouped.

# 提取伪时间值并添加到 Seurat 对象
pbmc <- AddMetaData(
object = pbmc,
metadata = pbmc.cds@principal_graph_aux@listData$UMAP$pseudotime,
col.name = "PBMC"
)
FeaturePlot(pbmc, c("PBMC"), pt.size = 0.1) & scale_color_viridis_c()
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# Slingshot
library(slingshot)
library(SingleCellExperiment)
library(RColorBrewer)
# seurat对象数据格式转换
sce <- as.SingleCellExperiment(pbmc)
sce
## class: SingleCellExperiment
## dim: 13714 2638
## metadata(0):
## assays(2): counts logcounts
## rownames(13714): AL627309.1 AP006222.2 ... PNRC2.1 SRSF10.1
## rowData names(0):
## colnames(2638): AAACATACAACCAC-1 AAACATTGAGCTAC-1 ... TTTGCATGAGAGGC-1 TTTGCATGCCTCAC-1
## colData names(8): orig.ident nCount_RNA ... PBMC ident
## reducedDimNames(2): PCA UMAP
## mainExpName: RNA
## altExpNames(0):
# slingshot分析
sce <- slingshot(data = sce,
clusterLabels = 'seurat_clusters',
reducedDim = 'UMAP',
start.clus = NULL, # 可指定起点亚群
end.clus = NULL # 可指定终点亚群
)
colnames(colData(sce))
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "percent.mt" "RNA_snn_res.0.5" "seurat_clusters"
## [7] "PBMC" "ident" "slingshot" "slingPseudotime_1" "slingPseudotime_2" "slingPseudotime_3"
## [13] "slingPseudotime_4"
summary(sce$slingPseudotime_1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 4.420 8.453 10.279 10.795 25.716 1323
pal <- c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))
# 通过“type”参数查看基于聚类的最小生成树最初是如何估计谱系结构的
plot(reducedDims(sce)$UMAP,col = pal[sce$seurat_clusters],cex = 0.5, pch=16, asp=1)

#lin1 <- getLineages(sce,
# clusterLabels = "seurat_clusters",
#start.clus = 'Pi16',#可指定起始细胞簇
#end.clus=c("Comp","Col15a1","Ccl19", "Coch", "Cxcl12", "Fbln1", "Bmp4", "Npnt", "Hhip"),#可指定终点细胞簇
# reducedDim = "UMAP")
plot.new()
plot(reducedDims(sce)$UMAP,col = brewer.pal(10,'Paired')[sce$seurat_clusters],pch=16,asp=1)
lines(SlingshotDataSet(sce), lwd=2,col = 'black')

colnames(colData(sce))
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "percent.mt" "RNA_snn_res.0.5" "seurat_clusters"
## [7] "PBMC" "ident" "slingshot" "slingPseudotime_1" "slingPseudotime_2" "slingPseudotime_3"
## [13] "slingPseudotime_4"
# fit a GAM with a smooth spline term for pseudotime
library(gam)
sling.pseu<-data.frame(sce$ident,sce$slingPseudotime_1,sce$slingPseudotime_2,sce$slingPseudotime_3,sce$slingPseudotime_4)
rownames(sling.pseu)<-colnames(sce)
head(sling.pseu)
## sce.ident sce.slingPseudotime_1 sce.slingPseudotime_2 sce.slingPseudotime_3 sce.slingPseudotime_4
## AAACATACAACCAC-1 Naive CD4 T 6.7373710 6.6467270 6.7236016 6.8558177
## AAACATTGAGCTAC-1 B NA NA 22.0512018 NA
## AAACATTGATCAGC-1 Memory CD4 T NA NA NA 10.9656304
## AAACCGTGCTTCCG-1 CD14+ Mono 23.1584569 NA NA NA
## AAACCGTGTATGCG-1 NK 0.7451371 0.7446951 0.7457627 0.7452388
## AAACGCACTGGTAC-1 Memory CD4 T 8.7288553 NA 7.5729890 8.9297217
t <- sling.pseu$sce.slingPseudotime_4
sling.cells <- rownames(sling.pseu)[!is.na(t)]
head(sling.cells)
## [1] "AAACATACAACCAC-1" "AAACATTGATCAGC-1" "AAACCGTGTATGCG-1" "AAACGCACTGGTAC-1" "AAACGCTGACCAGT-1" "AAACGCTGGTTCTT-1"
table(sling.pseu[sling.cells,]$sce.ident)
##
## Naive CD4 T CD14+ Mono Memory CD4 T B CD8 T FCGR3A+ Mono NK DC Platelet
## 203 1 375 0 308 0 154 0 14
##gene expression along pseudotime
test<-FetchData(pbmc,c("ident","CCL5"))
test<-test[sling.cells,]
test$pseu<-sling.pseu[sling.cells,]$sce.slingPseudotime_4
ggplot(test,aes(pseu,CCL5))+ geom_point(aes(colour=factor(ident)),alpha = 0.8)+
geom_smooth(colour="gray30",size=1.2,fullrange = TRUE) +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# Find temporally expressed genes
# Only look at the transcription factors
# Identify the variable genes by ranking all genes by their variance.
# log data
Y <- pbmc[["RNA"]]$data
allTFs<-read.table('./TFs_hg19_RcisTarget_hTF_TFDB.txt')
allTFs<-as.character(allTFs$V1)
tf.used<-intersect(allTFs,rownames(sce))
# 取谱系中的cell数量为GMM拟合时与矩阵细胞数维度匹配
Y <- Y[tf.used, sling.cells] # only counts for TFs
dim(Y)
## [1] 1453 1055
# Fit GAM for each gene using pseudotime as independent variable.
pseu.t <- sce$slingPseudotime_4
pseu.t <- pseu.t[!is.na(pseu.t)]
smooth.data<-array(c(0),dim = c(length(sling.cells),length(tf.used)))
rownames(smooth.data)<-sling.cells
colnames(smooth.data)<-tf.used
gam.pval<-as.data.frame(array(c(0),dim = c(length(tf.used),2)))
gam.pval[,1]<-tf.used
rownames(gam.pval)<-tf.used;
colnames(gam.pval)<-c('TF_used','p_value')
for (j in 1:length(tf.used)) {
d <- data.frame(z=Y[j,], t.used=pseu.t)
tmp<-gam(Y[j,] ~ s(t.used,5), data=d)
# gam(z ~ lo(t), data=d) #loess
gam.pval[j,2]<-as.numeric(summary(tmp)[4][[1]][1,5])
smooth.data[,j]<-tmp$fitted.values
}
gam.pval<-with(gam.pval,gam.pval[order(p_value),])
topgenes <- rownames(subset(gam.pval,p_value<=1e-5)) #select significantly changed genes
heatdata <- t(smooth.data[order(pseu.t, na.last = NA),topgenes])
max.g<-apply(heatdata,1,which.max)
min.g<-apply(heatdata,1,which.min)
genes<-as.data.frame(cbind(max.g,min.g))
genes<-with(genes,genes[order(max.g,min.g),])
used.genes<-as.character(rownames(genes))
print(length(used.genes))
## [1] 40
heatdata <- t(smooth.data[order(pseu.t, na.last = NA),used.genes])
dim(heatdata)
## [1] 40 1055
library(RColorBrewer)
library(plotly)
library(pheatmap)
s=3
pheatmap(heatdata,scale = "row", cluster_rows =F,cluster_cols = F,col= colorRampPalette(c("navy","white","red"))(200),#
legend = TRUE,show_rownames = T, show_colnames = F,
border_color=NA,fontsize = s,fontsize_row = s,fontsize_col = s)

# Monocle3 tutorial
expression_matrix <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_expression.rds"))
cell_metadata <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_colData.rds"))
gene_annotation <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_rowData.rds"))
cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 50)
cds <- align_cds(cds, alignment_group = "batch", residual_model_formula_str = "~ bg.300.loading + bg.400.loading + bg.500.1.loading + bg.500.2.loading + bg.r17.loading + bg.b01.loading + bg.b02.loading")
## Aligning cells from different batches using Batchelor.
## Please remember to cite:
## Haghverdi L, Lun ATL, Morgan MD, Marioni JC (2018). 'Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors.' Nat. Biotechnol., 36(5), 421-427. doi: 10.1038/nbt.4091
cds <- reduce_dimension(cds)
## No preprocess_method specified, and aligned coordinates have been computed previously. Using preprocess_method = 'Aligned'
plot_cells(cds, label_groups_by_cluster=FALSE, color_cells_by = "cell.type")
## No trajectory to plot. Has learn_graph() been called yet?
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text_repel()`).

ciliated_genes <- c("che-1",
"hlh-17",
"nhr-6",
"dmd-6",
"ceh-36",
"ham-1")
plot_cells(cds,
genes=ciliated_genes,
label_cell_groups=FALSE,
show_trajectory_graph=FALSE)

cds <- cluster_cells(cds)
plot_cells(cds, color_cells_by = "partition")
## No trajectory to plot. Has learn_graph() been called yet?

cds <- learn_graph(cds)
## | | | 0% | |=====================================================================================================================================| 100%
## | | | 0% | |=====================================================================================================================================| 100%
plot_cells(cds,
color_cells_by = "cell.type",
label_groups_by_cluster=FALSE,
label_leaves=FALSE,
label_branch_points=FALSE)
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text_repel()`).

plot_cells(cds,
color_cells_by = "embryo.time.bin",
label_cell_groups=FALSE,
label_leaves=TRUE,
label_branch_points=TRUE,
graph_label_size=1.5)

cds <- order_cells(cds)
##
## Listening on http://127.0.0.1:4504
plot_cells(cds,
color_cells_by = "pseudotime",
label_cell_groups=FALSE,
label_leaves=FALSE,
label_branch_points=FALSE,
graph_label_size=1.5)
